from bs4 import BeautifulSoup
import os
import re
import csv

def rollcall(soup):
	votes=[]
	for table in soup.find_all('table'):
		if 'Stemming/vote' in table.get_text():
			vote={'category':'RCV','section':'NA','info':'NA','article_range':'NA','range_problem':'NA'}
			for previous_line in list(table.previous_siblings):
				try:
					if previous_line['class']==['Titre2FR']:
						vote['section']=previous_line.get_text().encode("utf-8")
						break
				except:
					pass
			try:
				vote['info']=re.split('\((\D+)(\d+).*\)',table.get_text())[2].encode("utf-8")
			except:
				pass
			votes.append(vote)	
	return votes

def article_range(text):	
	chunks=re.split(',|et',text)
	number_articles=0
	range_problem=0
	for chunk in chunks:
		chunk=chunk.replace('\xc2\xa0','')
		if '\xc3\xa0' in chunk:
			start=re.split('\xc3\xa0',chunk)[0]
			end=re.split('\xc3\xa0',chunk)[1]
			try:
				int(start)
			except:
#				print vote
				range_problem=1
				try:
					start=re.findall('.*(\D)(\d+)\s*',start)[0][1]
				except:
					continue
			try:
#				print vote
				int(end)			
			except:
#				print vote
				range_problem=1
				try:
					end=re.findall('.*(\D)(\d+)\s*',end)[0][1]
				except:
					continue
			number_articles+=int(end)-int(start)+1
		else:
			try:
				int(chunk)
				number_articles+=1
			except:
#				print vote
				range_problem=1
				try:
					if len(re.findall('\d',chunk))!=0:
						number_articles+=1				
				except:
					pass
	return (number_articles,range_problem)	

def articles(soup):
	votes=[]
	for decision in soup.find_all('p'):
		if 'articlepararticle' in decision.get_text().replace('\r','').replace('\n','').replace(' ',''):
			vote={'category':'article by article','section':'NA','info':'NA','article_range':'NA','range_problem':'NA'}
			for previous_line in list(decision.previous_siblings):
				try:
					if previous_line['class']==['Titre2FR']:
						vote['section']=previous_line.get_text().encode("utf-8")
						break
				except:
					pass
			try:
				vote['info']=re.findall('\d+.*\d+',decision.get_text().replace('\r','').replace('\n',''))[0].encode("utf-8")
			except:
				pass
			if vote['info']=='NA':
				vote['article_range']=1
				vote['range_problem']=1
			else:
				vote['article_range']=article_range(vote['info'])[0]
				vote['range_problem']=article_range(vote['info'])[1]
			votes.append(vote)	
	return votes

re.findall('\d+.*\d+',text)
	
def standup(soup):
	votes=[]
	for decision in soup.find_all('p'):
		if 'parassisetlev' in decision.get_text().replace('\r','').replace('\n','').replace(' ',''):
			vote={'category':'standup','section':'NA','info':'NA','article_range':'NA','range_problem':'NA'}
			for previous_line in list(decision.previous_siblings):
				try:
					if previous_line['class']==['Titre2FR']:
						vote['section']=previous_line.get_text().encode("utf-8")
						break
				except:
					pass
			vote['info']=decision.get_text().encode("utf-8")
			votes.append(vote)	
	return votes
	
	
	
	
def secret(soup):
	votes=[]
	for decision in soup.find_all('p'):
		if 'clarelescrutinclos' in decision.get_text().replace('\r','').replace('\n','').replace(' ',''):
			vote={'category':'secret','section':'NA','info':'NA','article_range':'NA','range_problem':'NA'}
			for previous_line in list(decision.previous_siblings):
				try:
					if previous_line['class'] in [['Titre2FR'],['Titre1FR']]:
						vote['section']=previous_line.get_text().encode("utf-8")
						break
				except:
					pass
			votes.append(vote)	
	return votes

with open('/Users/betuldemirkaya/Desktop/Belgium_votes.csv', 'wb') as f:
	my_writer = csv.DictWriter(f, fieldnames=['category', 'info', 'section', 'file','article_range','range_problem'])
  	my_writer.writeheader()
	for file_name in os.listdir('/Users/betuldemirkaya/Desktop/Belgium/54/'):
		print file_name
		with open('/Users/betuldemirkaya/Desktop/Belgium/54/'+file_name) as f:
			myfile = f.read()
		soup=BeautifulSoup(myfile)
		soup.prettify()
		rollcall_list=rollcall(soup)
		articles_list=articles(soup)
		standup_list=standup(soup)
		secret_list=secret(soup)
		for vote in rollcall_list+articles_list+standup_list+secret_list:
			vote['file']=file_name
			my_writer.writerow(vote)